Members:
Student numbers:
Be specific. Some of the tasks can be coding (expect everyone to do this), background research, conceptualisation, visualisation, data analysis, data modelling
To be determined:
Author 1 (Rody): Coding, communication
Author 2 (Hugo): Coding
Author 3 (Tim): Coding
Author 4 (Wouter): Coding
Author 5 (Jesse): Coding
Flight data USA in 2015:
Weather data:
IATA-codes list, including coordinates:
Passenger numbers of airports USA in 2015 (Will not be used in the project, out of scope):
The large dataset of flights and delays, including the concerning airports and which airlines operate the flights can be used to visualise where in America the delays took place in the year 2015. This can be animated per day using colours and lines that change over the year.
To clearly display the visualisations, there is also a data set with coordinates of the airports and their full names.
Finally, daily data from rainfall is also compared to the delays to see if there is a trend between the two.
Process:
%matplotlib notebook
from urllib.request import urlopen
from plotly.offline import init_notebook_mode
import plotly.express as px
import plotly.graph_objects as go
import json
import pandas as pd
import numpy as np
import itertools
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import plotly.io as pio
import geopandas as gpd
import math
from plotly.subplots import make_subplots
init_notebook_mode(connected=True)
pio.renderers.default = "plotly_mimetype+notebook"
df = pd.read_csv('flights.csv', parse_dates={"date":['YEAR', 'MONTH', 'DAY']})
dM = pd.read_csv('flights.csv', parse_dates={"date":['YEAR', 'MONTH']})
C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\IPython\core\interactiveshell.py:3457: DtypeWarning: Columns (7,8) have mixed types.Specify dtype option on import or set low_memory=False.
df['date'] = df['date'].astype(str)
df["count"] = np.ones(len(df))
dM['date'] = dM['date'].astype(str)
dM["count"] = np.ones(len(dM))
#Arrivel delay for airline,
AL = df.groupby(["date", "AIRLINE"]).sum().reset_index()
AL["ArvCount"] = AL["ARRIVAL_DELAY"] / AL["count"]
AL2 = dM.groupby(["date", "AIRLINE"]).sum().reset_index()
AL2["ArvCount"] = AL2["ARRIVAL_DELAY"] / AL2["count"]
# Departure delay for airport
AP = dM.groupby(["date", "ORIGIN_AIRPORT"]).sum().reset_index()
AP["DepCount"] = AP["DEPARTURE_DELAY"] / AP["count"]
# Filling the gaps - Month
Month_unique = dM.date.unique() #Setting required inputs
Airline_unique = dM.AIRLINE.unique() #Setting required inputs
a = []
b = []
for i in range(len(Month_unique)):
for j in range(len(Airline_unique)):
a.append(Month_unique[i])
b.append(Airline_unique[j])
c = np.zeros(len(a)) #All non-existing values will be 0, as they will not be overruled
data = {'Date': a,
'Airline': b,
'Average arrival delay': c}
Months_df = pd.DataFrame(data)
for k in range(len(Months_df.Date)):
for l in range(len(AL2.date)):
if Months_df.Date[k]==AL2.date[l] and Months_df.Airline[k]==AL2.AIRLINE[l]:
Months_df["Average arrival delay"][k] = AL2.ArvCount[l]
# To slow for all days
# Days_unique = df.date.unique() #Setting required inputs
# Airline_unique = df.AIRLINE.unique() #Setting required inputs
# a = []
# b = []
# for i in range(len(Days_unique)):
# for j in range(len(Airline_unique)):
# a.append(Days_unique[i])
# b.append(Airline_unique[j])
# c = np.zeros(len(a)) #All non-existing values will be 0, as they will not be overruled
# data = {'Date': a,
# 'Airline': b,
# 'Average arrival delay': c}
# Days_df = pd.DataFrame(data)
# for k in range(len(Days_df.Date)):
# for l in range(len(AL.date)):
# if Days_df.Date[k]==AL.date[l] and Days_df.Airline[k]==AL.AIRLINE[l]:
# Days_df["Average arrival delay"][k] = AL.ArvCount[l]
C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\ipykernel_launcher.py:20: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
fig = px.bar(Months_df,y="Airline", x="Average arrival delay",animation_frame="Date",animation_group="Airline",color="Airline", range_x=[-20,100])
fig.update_yaxes(categoryorder="total ascending")
fig.update_layout(showlegend=False,
title="Average arrival delay of airlines",
xaxis_title="Average arrival delay [min/flight]",
yaxis_title="Airlines",)
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.show()
df = pd.read_csv('flights.csv')
C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\IPython\core\interactiveshell.py:3457: DtypeWarning: Columns (7,8) have mixed types.Specify dtype option on import or set low_memory=False.
airports = df.ORIGIN_AIRPORT.unique()
### Choose Airport ###
Airport = "ATL"
fly = []
delay = []
for i in range(len(df)):
if df.ORIGIN_AIRPORT[i] == Airport:
fly.append(df.DESTINATION_AIRPORT[i])
if df.ARRIVAL_DELAY[i]>0:
delay.append(df.ARRIVAL_DELAY[i])
else:
delay.append(0)
if df.DESTINATION_AIRPORT[i] == Airport:
fly.append(df.ORIGIN_AIRPORT[i])
if df.ARRIVAL_DELAY[i]>0:
delay.append(df.ARRIVAL_DELAY[i])
else:
delay.append(0)
Destinations = np.unique(fly)
df_flights = pd.DataFrame(index = Destinations)
Count = np.zeros(len(Destinations))
Delay = np.zeros(len(Destinations))
df_flights["cnt"] = Count
df_flights["Airport"] = Destinations
df_flights["Delay"] = Delay
for i in range(len(fly)):
for j in range(len(df_flights)):
if fly[i] == df_flights.iloc[j,1]:
df_flights.iloc[j,0] += 1
df_flights.iloc[j,2] += delay[i]
display(df_flights)
| cnt | Airport | Delay | |
|---|---|---|---|
| ABE | 1802.0 | ABE | 20399.0 |
| ABQ | 1604.0 | ABQ | 15851.0 |
| ABY | 1752.0 | ABY | 23443.0 |
| ACY | 357.0 | ACY | 7197.0 |
| AEX | 2466.0 | AEX | 38170.0 |
| ... | ... | ... | ... |
| TVC | 66.0 | TVC | 586.0 |
| TYS | 3437.0 | TYS | 31852.0 |
| VLD | 1870.0 | VLD | 21826.0 |
| VPS | 4415.0 | VPS | 42826.0 |
| XNA | 3151.0 | XNA | 33355.0 |
170 rows × 3 columns
df_airports = pd.read_csv('airports.csv')
df_airports
| IATA_CODE | AIRPORT | CITY | STATE | COUNTRY | LATITUDE | LONGITUDE | |
|---|---|---|---|---|---|---|---|
| 0 | ABE | Lehigh Valley International Airport | Allentown | PA | USA | 40.65236 | -75.44040 |
| 1 | ABI | Abilene Regional Airport | Abilene | TX | USA | 32.41132 | -99.68190 |
| 2 | ABQ | Albuquerque International Sunport | Albuquerque | NM | USA | 35.04022 | -106.60919 |
| 3 | ABR | Aberdeen Regional Airport | Aberdeen | SD | USA | 45.44906 | -98.42183 |
| 4 | ABY | Southwest Georgia Regional Airport | Albany | GA | USA | 31.53552 | -84.19447 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 317 | WRG | Wrangell Airport | Wrangell | AK | USA | 56.48433 | -132.36982 |
| 318 | WYS | Westerly State Airport | West Yellowstone | MT | USA | 44.68840 | -111.11764 |
| 319 | XNA | Northwest Arkansas Regional Airport | Fayetteville/Springdale/Rogers | AR | USA | 36.28187 | -94.30681 |
| 320 | YAK | Yakutat Airport | Yakutat | AK | USA | 59.50336 | -139.66023 |
| 321 | YUM | Yuma International Airport | Yuma | AZ | USA | 32.65658 | -114.60597 |
322 rows × 7 columns
df_flights = df_flights.rename(columns={'Airport': 'IATA'})
### Add empty columns to the DataFrame ###
df_flights['start'] = Airport
df_flights['airport'] = ''
df_flights['end_lat'] = ''
df_flights['end_lon'] = ''
df_flights['start_lat'] = ''
df_flights['start_lon'] = ''
### Add values to the columns ###
for i in range(len(df_flights)):
for j in range(len(df_airports)):
if df_flights['IATA'][i] == df_airports['IATA_CODE'][j]:
df_flights['airport'][i] = df_airports['AIRPORT'][j]
df_flights['end_lat'][i] = df_airports['LATITUDE'][j]
df_flights['end_lon'][i] = df_airports['LONGITUDE'][j]
if df_flights['start'][i] == df_airports['IATA_CODE'][j]:
df_flights['start_lat'][i] = df_airports['LATITUDE'][j]
df_flights['start_lon'][i] = df_airports['LONGITUDE'][j]
display(df_flights)
C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\ipykernel_launcher.py:15: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\ipykernel_launcher.py:16: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\ipykernel_launcher.py:17: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\ipykernel_launcher.py:19: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\ipykernel_launcher.py:20: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| cnt | IATA | Delay | start | airport | end_lat | end_lon | start_lat | start_lon | |
|---|---|---|---|---|---|---|---|---|---|
| ABE | 1802.0 | ABE | 20399.0 | ATL | Lehigh Valley International Airport | 40.65236 | -75.4404 | 33.64044 | -84.42694 |
| ABQ | 1604.0 | ABQ | 15851.0 | ATL | Albuquerque International Sunport | 35.04022 | -106.60919 | 33.64044 | -84.42694 |
| ABY | 1752.0 | ABY | 23443.0 | ATL | Southwest Georgia Regional Airport | 31.53552 | -84.19447 | 33.64044 | -84.42694 |
| ACY | 357.0 | ACY | 7197.0 | ATL | Atlantic City International Airport | 39.45758 | -74.57717 | 33.64044 | -84.42694 |
| AEX | 2466.0 | AEX | 38170.0 | ATL | Alexandria International Airport | 31.32737 | -92.54856 | 33.64044 | -84.42694 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| TVC | 66.0 | TVC | 586.0 | ATL | Cherry Capital Airport | 44.74144 | -85.58224 | 33.64044 | -84.42694 |
| TYS | 3437.0 | TYS | 31852.0 | ATL | McGhee Tyson Airport | 35.81249 | -83.99286 | 33.64044 | -84.42694 |
| VLD | 1870.0 | VLD | 21826.0 | ATL | Valdosta Regional Airport | 30.7825 | -83.27672 | 33.64044 | -84.42694 |
| VPS | 4415.0 | VPS | 42826.0 | ATL | Destin-Fort Walton Beach Airport/Eglin AFB | 30.48325 | -86.5254 | 33.64044 | -84.42694 |
| XNA | 3151.0 | XNA | 33355.0 | ATL | Northwest Arkansas Regional Airport | 36.28187 | -94.30681 | 33.64044 | -84.42694 |
170 rows × 9 columns
df_flights["avg_delay"] = df_flights["Delay"] / df_flights["cnt"]
fig = go.Figure()
fig.add_trace(go.Scattergeo(
locationmode = 'USA-states',
lon = df_airports['LONGITUDE'],
lat = df_airports['LATITUDE'],
hoverinfo = 'text',
text = df_airports['AIRPORT'],
mode = 'markers',
marker = dict(
size = 2,
color = 'rgb(255, 100, 100)',
line = dict(
width = 3,
color = 'rgba(100, 100, 100, 0)'
)
)))
flight_paths = []
for i in range(len(df_flights)):
fig.add_trace(
go.Scattergeo(
locationmode = 'USA-states',
lon = [df_flights['start_lon'][i], df_flights['end_lon'][i]],
lat = [df_flights['start_lat'][i], df_flights['end_lat'][i]],
mode = 'lines',
line = dict(width = 1,color = 'red'),
opacity = float(df_flights['cnt'][i]) / float(df_flights['cnt'].max()),
)
)
fig.update_layout(
title_text = f'All flights from {Airport} in 2015',
showlegend = False,
geo = dict(
scope = 'north america',
projection_type = 'natural earth',
showland = True,
landcolor = 'rgb(200, 200, 200)',
countrycolor = 'rgb(180, 180, 180)',
),
)
fig.show()
fig = go.Figure()
fig.add_trace(go.Scattergeo(
locationmode = 'USA-states',
lon = df_airports['LONGITUDE'],
lat = df_airports['LATITUDE'],
hoverinfo = 'text',
text = df_airports['AIRPORT'],
mode = 'markers',
marker = dict(
size = 2,
color = 'rgb(255, 100, 100)',
line = dict(
width = 3,
color = 'rgba(100, 100, 100, 0)'
)
)))
flight_paths = []
for i in range(len(df_flights)):
fig.add_trace(
go.Scattergeo(
locationmode = 'USA-states',
lon = [df_flights['start_lon'][i], df_flights['end_lon'][i]],
lat = [df_flights['start_lat'][i], df_flights['end_lat'][i]],
mode = 'lines',
line = dict(width = 1,color = 'red'),
opacity = float(df_flights['avg_delay'][i]) / float(df_flights['avg_delay'].max()),
)
)
fig.update_layout(
title_text = f'Average delay of flights from {Airport} in 2015',
showlegend = False,
geo = dict(
scope = 'north america',
projection_type = 'natural earth',
showland = True,
landcolor = 'rgb(200, 200, 200)',
countrycolor = 'rgb(180, 180, 180)',
),
)
fig.show()
df = pd.read_csv('flights.csv')
C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\IPython\core\interactiveshell.py:3457: DtypeWarning: Columns (7,8) have mixed types.Specify dtype option on import or set low_memory=False.
airports = df.ORIGIN_AIRPORT.unique()
origin = []
destination = []
for i in range(len(df)):
if df.ARRIVAL_DELAY[i] >= 24*60:
origin.append(df.ORIGIN_AIRPORT[i])
destination.append(df.DESTINATION_AIRPORT[i])
df_flights = pd.DataFrame(index = origin)
df_flights["Origin"] = origin
df_flights["Destination"] = destination
df_airports = pd.read_csv('airports.csv')
df_airports
| IATA_CODE | AIRPORT | CITY | STATE | COUNTRY | LATITUDE | LONGITUDE | |
|---|---|---|---|---|---|---|---|
| 0 | ABE | Lehigh Valley International Airport | Allentown | PA | USA | 40.65236 | -75.44040 |
| 1 | ABI | Abilene Regional Airport | Abilene | TX | USA | 32.41132 | -99.68190 |
| 2 | ABQ | Albuquerque International Sunport | Albuquerque | NM | USA | 35.04022 | -106.60919 |
| 3 | ABR | Aberdeen Regional Airport | Aberdeen | SD | USA | 45.44906 | -98.42183 |
| 4 | ABY | Southwest Georgia Regional Airport | Albany | GA | USA | 31.53552 | -84.19447 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 317 | WRG | Wrangell Airport | Wrangell | AK | USA | 56.48433 | -132.36982 |
| 318 | WYS | Westerly State Airport | West Yellowstone | MT | USA | 44.68840 | -111.11764 |
| 319 | XNA | Northwest Arkansas Regional Airport | Fayetteville/Springdale/Rogers | AR | USA | 36.28187 | -94.30681 |
| 320 | YAK | Yakutat Airport | Yakutat | AK | USA | 59.50336 | -139.66023 |
| 321 | YUM | Yuma International Airport | Yuma | AZ | USA | 32.65658 | -114.60597 |
322 rows × 7 columns
df_flights = df_flights.rename(columns={'Origin': 'IATA1'})
df_flights = df_flights.rename(columns={'Destination': 'IATA2'})
### Add empty columns to the DataFrame ###
df_flights['end_lat'] = ''
df_flights['end_lon'] = ''
df_flights['start_lat'] = ''
df_flights['start_lon'] = ''
### Add values to the columns ###
for i in range(len(df_flights)):
for j in range(len(df_airports)):
if df_flights.iloc[i,1] == df_airports['IATA_CODE'][j]:
df_flights['end_lat'][i] = df_airports['LATITUDE'][j]
df_flights['end_lon'][i] = df_airports['LONGITUDE'][j]
if df_flights.iloc[i,0] == df_airports['IATA_CODE'][j]:
df_flights['start_lat'][i] = df_airports['LATITUDE'][j]
df_flights['start_lon'][i] = df_airports['LONGITUDE'][j]
display(df_flights)
| IATA1 | IATA2 | end_lat | end_lon | start_lat | start_lon | |
|---|---|---|---|---|---|---|
| AUS | AUS | DFW | 32.89595 | -97.0372 | 30.19453 | -97.66987 |
| LAS | LAS | LAX | 33.94254 | -118.40807 | 36.08036 | -115.15233 |
| JAX | JAX | DFW | 32.89595 | -97.0372 | 30.49406 | -81.68786 |
| BHM | BHM | DFW | 32.89595 | -97.0372 | 33.56294 | -86.75355 |
| FAT | FAT | DFW | 32.89595 | -97.0372 | 36.77619 | -119.71814 |
| JFK | JFK | HNL | 21.31869 | -157.92241 | 40.63975 | -73.77893 |
| EGE | EGE | ORD | 41.9796 | -87.90446 | 39.64257 | -106.9177 |
| STL | STL | MIA | 25.79325 | -80.29056 | 38.74769 | -90.35999 |
| HNL | HNL | LAX | 33.94254 | -118.40807 | 21.31869 | -157.92241 |
| SAT | SAT | DFW | 32.89595 | -97.0372 | 29.53369 | -98.46978 |
| OMA | OMA | DFW | 32.89595 | -97.0372 | 41.30252 | -95.89417 |
| SMF | SMF | DFW | 32.89595 | -97.0372 | 38.69542 | -121.59077 |
| FLL | FLL | ORD | 41.9796 | -87.90446 | 26.07258 | -80.15275 |
| SAN | SAN | JFK | 40.63975 | -73.77893 | 32.73356 | -117.18966 |
| PNS | PNS | DFW | 32.89595 | -97.0372 | 30.47331 | -87.18744 |
| LAX | LAX | STL | 38.74769 | -90.35999 | 33.94254 | -118.40807 |
| SMF | SMF | DFW | 32.89595 | -97.0372 | 38.69542 | -121.59077 |
| SFO | SFO | DFW | 32.89595 | -97.0372 | 37.619 | -122.37484 |
| RIC | RIC | DFW | 32.89595 | -97.0372 | 37.50517 | -77.31967 |
| IND | IND | LAX | 33.94254 | -118.40807 | 39.71733 | -86.29438 |
| RIC | RIC | DFW | 32.89595 | -97.0372 | 37.50517 | -77.31967 |
| HNL | HNL | LAX | 33.94254 | -118.40807 | 21.31869 | -157.92241 |
| SAN | SAN | DFW | 32.89595 | -97.0372 | 32.73356 | -117.18966 |
| STX | STX | MIA | 25.79325 | -80.29056 | 17.70189 | -64.79856 |
| 14747 | 14747 | 11298 | ||||
| 11413 | 11413 | 11298 | ||||
| 11612 | 11612 | 13930 | ||||
| MCI | MCI | DFW | 32.89595 | -97.0372 | 39.29761 | -94.71391 |
| MCO | MCO | JFK | 40.63975 | -73.77893 | 28.42889 | -81.31603 |
| DTW | DTW | ORD | 41.9796 | -87.90446 | 42.21206 | -83.34884 |
| MSP | MSP | ORD | 41.9796 | -87.90446 | 44.88055 | -93.21692 |
| ABQ | ABQ | DFW | 32.89595 | -97.0372 | 35.04022 | -106.60919 |
fig = go.Figure()
fig.add_trace(go.Scattergeo(
locationmode = 'USA-states',
lon = df_airports['LONGITUDE'],
lat = df_airports['LATITUDE'],
hoverinfo = 'text',
text = df_airports['AIRPORT'],
mode = 'markers',
marker = dict(
size = 2,
color = 'rgb(255, 100, 100)',
line = dict(
width = 3,
color = 'rgba(100, 100, 100, 0)'
)
)))
flight_paths = []
for i in range(len(df_flights)):
fig.add_trace(
go.Scattergeo(
locationmode = 'USA-states',
lon = [df_flights.iloc[i,5], df_flights.iloc[i,3]],
lat = [df_flights.iloc[i,4], df_flights.iloc[i,2]],
mode = 'lines',
line = dict(width = 1,color = 'red'),
opacity = 1,
)
)
fig.update_layout(
title_text = f'Flights in 2015 with more than 24 hour delay',
showlegend = False,
geo = dict(
scope = 'north america',
projection_type = 'natural earth',
showland = True,
landcolor = 'rgb(200, 200, 200)',
countrycolor = 'rgb(180, 180, 180)',
),
)
fig.show()
# pip install openpyxl
df = pd.read_csv('flights.csv', parse_dates={"date":['YEAR', 'MONTH', 'DAY']})
df3 = pd.read_excel('Weather data.xlsx', index_col='airport')
C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\IPython\core\interactiveshell.py:3457: DtypeWarning: Columns (7,8) have mixed types.Specify dtype option on import or set low_memory=False.
df_KHOU = df3.loc['KHOU']
# Plot of KHOU
fig = go.Figure()
fig.add_trace(go.Bar(x=df_KHOU.date, y=df_KHOU.actual_precipitation,
name='Actual precipitation'))
fig.add_trace(go.Scatter(x=df_KHOU[df_KHOU.actual_precipitation > 20].date, y=df_KHOU[df_KHOU.actual_precipitation > 20].actual_precipitation,
mode='markers', name='Precipitation above 20 inch'))
fig.update_layout(xaxis=dict(title="Date"), yaxis=dict(title="Actual precipitation [inches]"))
fig.update_layout(
title_text="Actual precipitation KHOU")
fig.show()
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(
go.Bar(x=df_KHOU.date, y=df_KHOU.actual_precipitation, name='Actual precipitation'),
secondary_y=False)
fig.add_trace(go.Scatter(
x=df_KHOU[df_KHOU.actual_precipitation > 20].date, y=df_KHOU[df_KHOU.actual_precipitation > 20].actual_precipitation,
mode='markers', name='Precipitation above 20 inch'))
fig.add_trace(go.Scatter(x=df_KHOU.date, y=df_KHOU.actual_mean_temp, name="Actual mean temp"),
secondary_y=True)
# Add figure title
fig.update_layout(
title_text="Temperature and precipitation KHOU"
)
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="Precipitation [Inches]", secondary_y=False)
fig.update_yaxes(title_text="Temperature [F]", secondary_y=True)
fig.show()
Airport = "HOU"
df['date'] = df['date'].astype(str)
delay = []
for i in range(len(df)):
if df.ORIGIN_AIRPORT[i] == Airport:
if df.ARRIVAL_DELAY[i] > 0:
delay.append([df.date[i], df.ARRIVAL_DELAY[i]])
else:
delay.append([df.date[i], 0])
dates = df['date'].unique()
df2 = pd.DataFrame(index = dates)
df2['Total_Delay'] = 0
df2['Flights'] = 0
for i in range(len(df2)):
for j in range(len(delay)):
if df2.index[i] == delay[j][0]:
df2.Total_Delay[i] += delay[j][1]
df2.Flights[i] += 1
df2['Average_Delay'] = df2['Total_Delay'] / df2['Flights']
df2['Rainfall'] = ' '
for i in range(len(df_KHOU)):
df2.Rainfall[i] = df_KHOU.actual_precipitation[i]
df4 = df2.iloc[0:len(df_KHOU)]
C:\Users\thade\Miniconda3\envs\TIL6022\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()
df4.Rainfall.plot(kind='bar', color='red', ax=ax, position=1)
df4.Average_Delay.plot(kind='bar', color='blue', ax=ax2, position=0)
ax.set_ylabel('Rainfall [inches]')
ax2.set_ylabel('Average Delay [minutes]')
for i in range(len(df_airports)):
if Airport == df_airports['IATA_CODE'][i]:
Airport_name = df_airports['AIRPORT'][i]
plt.title(f'Delay and rainfall per day at {Airport_name} in 2015 ')
n=10
ticks = ax.xaxis.get_ticklocs()
ticklabels = ax.xaxis.get_ticklabels()
ax.xaxis.set_ticks(ticks[::n])
ax.xaxis.set_ticklabels(ticklabels[::n])
ax.legend(loc=1)
ax2.legend(loc=2)
plt.xlim(50,150)
plt.show()
#CODE
To be determined
To be determined
#END